#encoding=utf-8
__author__ = 'BK'
import os
import csv
'''
数据切割脚本
func:
切割1/4的user （Info随机的话类别成比例）

将log 和info 文件按照user 1:3进行切合
3/4 数据用于训练 命名为train_log train_info 存在cube目录下
1/4 数据用于测试 命名为val_log val_info     存在cube目录下

usage:
运行即可
返回测试集userSet
'''

def seg():
    path_current = os.getcwd()
    path_parent = os.path.dirname(path_current)

    #获得文件
    log = open(path_parent+"/data/sample_log.csv")
    info = open(path_parent+"/data/sample_info.csv")

    #第一行提取
    #logFirstLine=log.readline().strip().split(',')
    #infoFirstLine=info.readline().strip().split(',')

    infoContent=info.readlines()
    #一共用户数目
    allUserNum=len(infoContent)

    #比例
    proportion=0.25
    userSet=set()
    sampleNum=allUserNum*proportion

    #获取测试集userset
    for i in xrange(int(sampleNum)):
        userSet.add(infoContent[i].strip().split(',')[0])


    #初始CSV文件，写入第一行
    test_log = open('test_log.csv','wb')
    writer_test_log = csv.writer(test_log)
    #writer_test_log.writerow(logFirstLine)

    train_log = open('train_log.csv','wb')
    writer_train_log = csv.writer(train_log)
    #writer_train_log.writerow(logFirstLine)

    test_info = open('test_info.csv','wb')
    writer_test_info = csv.writer(test_info)
    #writer_test_info.writerow(infoFirstLine)

    train_info = open('train_info.csv','wb')
    writer_train_info = csv.writer(train_info)
    #writer_train_info.writerow(infoFirstLine)


    #写入testinfo和traininfo
    for i in log.readlines():
        if i.strip().split(',')[0] in userSet:
            writer_test_log.writerow(i.strip().split(','))
        else:
            writer_train_log.writerow(i.strip().split(','))

    #写入testlog和trainLog
    for i in infoContent:
        if (i.strip().split(',')[0])in userSet:
            writer_test_info.writerow(i.strip().split(','))
        else:
            writer_train_info.writerow(i.strip().split(','))

    return userSet

print len(seg())